paretoDens <- function(x, location, scale, shape, log = TRUE) {
if(any(x < location)) {
stop("Some observations are below the lower bound (location parameter)!")
}
logdens <- - log(scale) - (1 / shape + 1) * log(1 + shape * (x - location) / scale)
if(!log) {
return(exp(logdens))
} else {
return(logdens)
}
}
paretoLogLik <- function(params, x, location = NULL, weights = NULL, barrier = 0) {
if(is.null(weights)) {
weights <- rep(1, length(x))
}
if(is.null(location)) {
location <- min(x)
}
loglik <- paretoDens(x, location, params[1], params[2], TRUE)
loglik <- sum(weights * loglik)
penalty <- - log(params[2]) * barrier * sum(weights)
# print(c(params, -loglik))
return(-loglik + penalty)
}
paretoGrad <- function(params, x, location = NULL, weights = NULL, barrier = 0.005) {
if(is.null(weights)) {
weights <- rep(1, length(x))
}
if(is.null(location)) {
location <- min(x)
}
scale <- params[1]
shape <- params[2]
z <- (x - location) / scale
dscale <- -1/scale + z / (scale^2 + scale^2 * shape * z) * (1/shape + 1)
dshape <- 1/shape^2 * log(1 + shape * z) - (1/shape + 1) * z / (1 + shape * z)
dscale <- sum(weights * dscale)
dshape <- sum(weights * dshape)
penalty <- - sum(weights) / shape * barrier
return(-c(dscale + penalty, dshape))
}
paretoML <- function(x, location = NULL, weights = NULL, barrier = 0) {
if(is.null(weights)) {
weights <- rep(1, length(x))
}
if(is.null(location)) {
location <- min(x)
}
init <- c(sd(x), 1)
fit <- optim(init, fn = paretoLogLik, gr = paretoGrad,
x = x, location = location, weights = weights,
barrier = barrier,
lower = 10^-12,
method = "L-BFGS-B")
return(fit)
}
n <- 10^4
threshold <- 2
mprob <- c(0.8, 0.2, 0)
muNorm <- rnorm(n, mean = 0, sd = 0.25)
muExp <- rnorm(n, mean = 0, sd = 2) #rexp(n, rate = 0.5) * (1 - 2 * rbinom(n, 1, 0.5))
muGamma <- rgamma(n, 1, 3)
memberships <- t(rmultinom(n, 1, mprob))
mu <- rowSums(memberships * cbind(muNorm, muExp, muGamma))
z <- mu + rnorm(n)
keep <- z > threshold
x <- z[keep]
fit <- paretoML(x, location = threshold, barrier = 0.005)
scale <- fit$par[1]
shape <- fit$par[2]
par(mfrow = c(1, 1))
plot(density(x), ylim = c(0, 1))
lines(sort(x), paretoDens(sort(x), threshold, scale, shape, FALSE), type = "l", col = "blue")
library(gPdtest)
gpdfit <- gpd.fit(x, "amle")
lines(sort(x), paretoDens(sort(x), threshold, gpdfit[2], gpdfit[1], FALSE), type = "l", col = "red")
mixfit <- normParetoMixEM(x, threshold, emIterations = 200, barrier = 0)
mixdens <- normParetoMixDens(x, mixfit)
lines(sort(x), normParetoMixDens(sort(x), mixfit), col = "green")
# Tweedy -------
npTweed <- normParetoTweedy(mixfit)
location <- threshold
scale <- fit$par[1]
shape <- fit$par[2]
tweedy <- paretoTweedy(x, location, scale, shape)
mean(tweedy - mu[keep])
mean(x - mu[keep])
mean(npTweed - mu[keep])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.